* Preparation for Spatial Mismatch estimation

* year of separation
gen sep_2001=0
gen sep_2002=0
gen sep_2003=0
gen sep_2004=0
gen sep_2005=0
replace sep_2001=1 if sep_year==2001
replace sep_2002=1 if sep_year==2002
replace sep_2003=1 if sep_year==2003
replace sep_2004=1 if sep_year==2004
replace sep_2005=1 if sep_year==2005

* fixed effect for tract by year interaction
gen h_stfidstring=string(h_stfid,"%012.0f")
gen fetys=h_stfidstring + string(sep_year)
encode fetys, gen(fety)
list h_stfid h_stfidstring sep_year fetys fety in 1/10
describe h_stfid h_stfidstring sep_year fetys fety

* puma for clustering
gen str7 puma5_s=string(puma5,"%07.0f")
describe puma5 puma5_s

* multiple interactions
gen femy=(100*stmet_name_c)+(sep_year-2000)

tabulate femy if femy!=201, generate(femy_fe)
sum femy_fe*
recode femy_fe1-femy_fe48 (.=0)
  
* season of separation
gen sep_seas_2=0
gen sep_seas_3=0
gen sep_seas_4=0
replace sep_seas_2=1 if sep_seas==2
replace sep_seas_3=1 if sep_seas==3
replace sep_seas_4=1 if sep_seas==4

* convert tract to string
gen stctytract=string(h_stfid,"%11.0f")
* no need for county dummy variable, use central city if necessary
gen stcty=substr(stctytract,1,5)
gen st=substr(stctytract,1,2)

* review metro contents
sort stmet_name
by stmet_name: tabulate puma1
by stmet_name: tabulate puma5
by stmet_name: tabulate stcty

* create dummies for metro areas
gen inind=0
gen midet=0
gen wimil=0
gen ilchi=0
gen ohcol=0
gen ohcle=0
gen mnmin=0
gen nybuf=0
gen papit=0
replace inind=1 if stmet_name=="inind"
replace midet=1 if stmet_name=="midet"
replace wimil=1 if stmet_name=="wimil" 
replace ilchi=1 if stmet_name=="ilchi"
replace ohcol=1 if stmet_name=="ohcol"
replace ohcle=1 if stmet_name=="ohcle"
replace mnmin=1 if stmet_name=="mnmin"
replace nybuf=1 if stmet_name=="nybuf"
replace papit=1 if stmet_name=="papit"

* convert median year built into median age of housing (value is years after 1900)
gen build_age_med=100-year_built_med
drop year_built_med

* feasibility of transit
gen transit_feas=0
replace transit_feas=1 if tract_transit_mpo==1 & tract_transit_census==1

* industry sector dummy
gen sec_sep_good=0
gen sec_sep_retl=0
gen sec_sep_prof=0
gen sec_sep_educ=0
gen sec_sep_heal=0
replace sec_sep_good=1 if sep_sec_c==1
replace sec_sep_retl=1 if sep_sec_c==2
replace sec_sep_prof=1 if sep_sec_c==3
replace sec_sep_educ=1 if sep_sec_c==4
replace sec_sep_heal=1 if sep_sec_c==5

* previous commute time dummy
gen sep_time_0020=0
gen sep_time_40up=0
replace sep_time_0020=1 if sep_auto<20
replace sep_time_40up=1 if sep_auto>40

* new age categories
gen age_c3=0
replace age_c3=1 if age_c==1 | age_c==2
replace age_c3=2 if age_c==3 | age_c==4
replace age_c3=3 if age_c==5

* other variables
gen sep_time_ln=log(sep_auto)

* list of sectors
local seclist all sc1 sc2 sc3 sc4 sc5 gp1 gp2 gp3 gp4 eall
display "`seclist'"

* list of travel times thresholds to begin discounting
local timelist 5 10 15
display "`timelist'"

* produce variables for each sector
foreach sec in `seclist' {
display "`sec'"

* log functions for access, distance
gen ln_indxx_dist_o_lab_1_`sec'=log(indxx_dist_o_lab_1_`sec')
gen ln_indxx_dist_d_job_1_`sec'=log(indxx_dist_d_job_1_`sec')
gen ratio_dist_`sec'_1= ///
(indxx_dist_d_job_1_`sec'-indxx_dist_o_lab_1_`sec')/((indxx_dist_d_job_1_`sec'+indxx_dist_o_lab_1_`sec')/2)

* cycle through travel times
foreach time in `timelist' {
display "`sec' `time'"

gen ln_indxx_auto_o_lab_`time'_`sec'=log(indxx_auto_o_lab_`time'_`sec')
gen ln_indxx_auto_d_job_`time'_`sec'=log(indxx_auto_d_job_`time'_`sec')
gen ln_index_auto_`sec'_`time'up_ex=log(index_auto_`sec'_`time'up_ex)

* calculate effective jobs under feasible transit
gen index_predfeas_`sec'_`time'up_ex=index_predboth_`sec'_`time'up_ex
replace index_predfeas_`sec'_`time'up_ex=index_auto_`sec'_`time'up_ex if transit_feas==0
gen indcs_predfeas_`sec'_`time'up_ex=indcs_predboth_`sec'_`time'up_ex
replace indcs_predfeas_`sec'_`time'up_ex=indcs_auto_`sec'_`time'up_ex if transit_feas==0

gen ratio_auto_`sec'_`time'= ///
(index_auto_`sec'_`time'up_ex-indcs_auto_`sec'_`time'up_ex) ///
  / ///
((index_auto_`sec'_`time'up_ex+indcs_auto_`sec'_`time'up_ex)/2)

gen ratio_tran_`sec'_`time'= ///
(index_tran_`sec'_`time'up_ex-indcs_tran_`sec'_`time'up_ex) ///
  / ///
((index_tran_`sec'_`time'up_ex+indcs_tran_`sec'_`time'up_ex)/2)

gen ratio_predboth_`sec'_`time'= ///
(index_predboth_`sec'_`time'up_ex-indcs_predboth_`sec'_`time'up_ex) ///
 / ///
((index_predboth_`sec'_`time'up_ex+indcs_predboth_`sec'_`time'up_ex)/2)

gen ratio_predfeas_`sec'_`time'=ratio_predboth_`sec'_`time'
replace ratio_predfeas_`sec'_`time'=ratio_auto_`sec'_`time' if transit_feas==0

gen ratio_predauto_`sec'_`time'= ///
(index_predauto_`sec'_`time'up_ex-indcs_predauto_`sec'_`time'up_ex) ///
 / ///
((index_predauto_`sec'_`time'up_ex+indcs_predauto_`sec'_`time'up_ex)/2)

gen ratio_predtran_`sec'_`time'= ///
(index_predtran_`sec'_`time'up_ex-indcs_predtran_`sec'_`time'up_ex) ///
 / ///
((index_predtran_`sec'_`time'up_ex+indcs_predtran_`sec'_`time'up_ex)/2)
                        
}
}

gen ratio_dist_all_3= ///
(index_dist_all_3mi_ex-indcs_dist_all_3mi_ex)/((index_dist_all_3mi_ex+indcs_dist_all_3mi_ex)/2)
gen ratio_dist_gpsa_3= ///
(index_dist_gps_3mi_ex-indcs_dist_gpsa_3mi_ex)/((index_dist_gps_3mi_ex+indcs_dist_gpsa_3mi_ex)/2)
gen ratio_dist_gpoa_3= ///
(index_dist_gpo_3mi_ex-indcs_dist_gpoa_3mi_ex)/((index_dist_gpo_3mi_ex+indcs_dist_gpoa_3mi_ex)/2)
gen ratio_dist_gpss_3= ///
(index_dist_gps_3mi_ex-indcs_dist_gpss_3mi_ex)/((index_dist_gps_3mi_ex+indcs_dist_gpss_3mi_ex)/2)
gen ratio_dist_gpos_3= ///
(index_dist_gpo_3mi_ex-indcs_dist_gpos_3mi_ex)/((index_dist_gpo_3mi_ex+indcs_dist_gpos_3mi_ex)/2)


* alternate CS group, labor force 16 and up from Census
gen ratio_auto_all16_10= ///
(index_auto_all_10up_ex-indcs_auto_pop16lab_10up_ex) ///
  / ///
((index_auto_all_10up_ex+indcs_auto_pop16lab_10up_ex)/2)

gen ratio_predboth_all16_10= ///
(index_predboth_all_10up_ex-indcs_predboth_pop16lab_10up_ex) ///
 / ///
((index_predboth_all_10up_ex+indcs_predboth_pop16lab_10up_ex)/2)

gen ratio_predfeas_all16_10=ratio_predboth_all16_10
replace ratio_predfeas_all16_10=ratio_auto_all16_10 if transit_feas==0



* scale JO and CS by metro
by stmet_name, sort: egen indexmetro=mean(index_predfeas_all_10up_ex)
gen rel_jo=ln(index_predfeas_all_10up_ex/indexmetro)
by stmet_name, sort: egen indcsmetro=mean(indcs_predfeas_all_10up_ex)
gen rel_cs=ln(indcs_predfeas_all_10up_ex/indcsmetro)
sum ratio_predfeas_all_10 rel_jo rel_cs
by stmet_name: sum rel_jo rel_cs
correlate ratio_predfeas_all_10 rel_jo rel_cs ///
          if qtr==8, covariance
pcorrmat  ratio_predfeas_all_10 rel_jo rel_cs ///
          if qtr==8, part(inind midet ohcol ohcle mnmin papit nybuf wimil)

* access ranges
egen acc_c=cut(ratio_predfeas_all_10), at(-2,-1.75,-1.5,-1.25,-1,-0.75,-0.5,-0.25,0,0.25,0.5,0.75,1,1.25,1.5,1.75,2)
replace acc_c=acc_c+0.125

* create variable for subordinate earner
gen primeearn=9
replace primeearn=0 if married==1 & earntotall>0.5*earntotallhu   
replace primeearn=1 if married==1 & earntotall<=0.5*earntotallhu   
replace primeearn=2 if married==0

* total earnings variables
gen earning=earntothire
gen earningln=log(earntothire)
gen earningln1=log(earntothire+1)
sum earning if qtr==8
sum earning if qtr==8 & earntothire>0
sum earningln if qtr==8
sum earningln if qtr==8 & earntothire>0

